;************************************************************************
;*
;* Copyright:
;*	Freescale Semiconductor, INC. All Rights Reserved.  
;*  You are hereby granted a copyright license to use, modify, and
;*  distribute the SOFTWARE so long as this entire notice is
;*  retained without alteration in any modified and/or redistributed
;*  versions, and that such modified versions are clearly identified
;*  as such. No licenses are granted by implication, estoppel or
;*  otherwise under any patents or trademarks of Freescale Semiconductor, 
;*  Inc. This software is provided on an "AS IS" basis and without warranty.
;*
;*  To the maximum extent permitted by applicable law, FREESCALE 
;*  DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, INCLUDING 
;*  IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR
;*  PURPOSE AND ANY WARRANTY AGAINST INFRINGEMENT WITH REGARD TO THE 
;*  SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) AND ANY 
;*  ACCOMPANYING WRITTEN MATERIALS.
;* 
;*  To the maximum extent permitted by applicable law, IN NO EVENT
;*  SHALL FREESCALE BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING 
;*  WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS 
;*  INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
;*  LOSS) ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.   
;* 
;*  Freescale assumes no responsibility for the maintenance and support
;*  of this software
;*************************************************************************
;*
;*  FILE NAME : fft16.s
;*
;*  PURPOSE   : contains optimized versions of init_data16(), fft16() and 
;*              inv_fft16() functions  
;*
;*  AUTHORS   : Anatoly Khaynakov and Andriy Tymkiv
;*
;*************************************************************************
	.section .data,4,r

TF_table:
	.word	0x8000,0x0,0x8001,0xff37,0x8003,0xfe6e,0x8006,0xfda5
	.word	0x800a,0xfcdc,0x8010,0xfc13,0x8017,0xfb4a,0x801f,0xfa81
	.word	0x8028,0xf9b9,0x8032,0xf8f0,0x803e,0xf827,0x804b,0xf75e
	.word	0x8059,0xf696,0x8069,0xf5cd,0x8079,0xf505,0x808b,0xf43d
	.word	0x809e,0xf375,0x80b3,0xf2ad,0x80c8,0xf1e5,0x80df,0xf11d
	.word	0x80f7,0xf055,0x8110,0xef8e,0x812b,0xeec7,0x8146,0xedff
	.word	0x8163,0xed38,0x8181,0xec72,0x81a1,0xebab,0x81c1,0xeae5
	.word	0x81e3,0xea1e,0x8206,0xe958,0x822a,0xe893,0x8250,0xe7cd
	.word	0x8276,0xe708,0x829e,0xe643,0x82c7,0xe57e,0x82f1,0xe4b9
	.word	0x831d,0xe3f5,0x8349,0xe331,0x8377,0xe26d,0x83a6,0xe1aa
	.word	0x83d7,0xe0e7,0x8408,0xe024,0x843b,0xdf61,0x846e,0xde9f
	.word	0x84a3,0xdddd,0x84da,0xdd1b,0x8511,0xdc5a,0x854a,0xdb99
	.word	0x8583,0xdad8,0x85be,0xda18,0x85fb,0xd958,0x8638,0xd899
	.word	0x8676,0xd7da,0x86b6,0xd71b,0x86f7,0xd65d,0x8739,0xd59f
	.word	0x877c,0xd4e1,0x87c0,0xd424,0x8806,0xd368,0x884c,0xd2ab
	.word	0x8894,0xd1ef,0x88dd,0xd134,0x8927,0xd079,0x8972,0xcfbf
	.word	0x89bf,0xcf05,0x8a0c,0xce4b,0x8a5b,0xcd92,0x8aab,0xccda
	.word	0x8afc,0xcc22,0x8b4e,0xcb6a,0x8ba1,0xcab3,0x8bf5,0xc9fc
	.word	0x8c4b,0xc946,0x8ca1,0xc891,0x8cf9,0xc7dc,0x8d51,0xc728
	.word	0x8dab,0xc674,0x8e06,0xc5c0,0x8e62,0xc50e,0x8ebf,0xc45b
	.word	0x8f1e,0xc3aa,0x8f7d,0xc2f9,0x8fdd,0xc248,0x903f,0xc198
	.word	0x90a1,0xc0e9,0x9105,0xc03b,0x916a,0xbf8d,0x91d0,0xbedf
	.word	0x9236,0xbe32,0x929e,0xbd86,0x9307,0xbcdb,0x9371,0xbc30
	.word	0x93dc,0xbb86,0x9448,0xbadc,0x94b6,0xba33,0x9524,0xb98b
	.word	0x9593,0xb8e4,0x9603,0xb83d,0x9674,0xb797,0x96e7,0xb6f1
	.word	0x975a,0xb64c,0x97ce,0xb5a8,0x9843,0xb505,0x98ba,0xb462
	.word	0x9931,0xb3c1,0x99a9,0xb31f,0x9a23,0xb27f,0x9a9d,0xb1df
	.word	0x9b18,0xb141,0x9b94,0xb0a2,0x9c11,0xb005,0x9c8f,0xaf69
	.word	0x9d0e,0xaecd,0x9d8f,0xae32,0x9e0f,0xad97,0x9e91,0xacfe
	.word	0x9f14,0xac65,0x9f98,0xabcd,0xa01d,0xab36,0xa0a2,0xaaa0
	.word	0xa129,0xaa0b,0xa1b0,0xa976,0xa239,0xa8e3,0xa2c2,0xa850
	.word	0xa34c,0xa7be,0xa3d7,0xa72c,0xa463,0xa69c,0xa4f0,0xa60d
	.word	0xa57e,0xa57e,0xa60d,0xa4f0,0xa69c,0xa463,0xa72c,0xa3d7
	.word	0xa7be,0xa34c,0xa850,0xa2c2,0xa8e3,0xa239,0xa976,0xa1b0
	.word	0xaa0b,0xa129,0xaaa0,0xa0a2,0xab36,0xa01d,0xabcd,0x9f98
	.word	0xac65,0x9f14,0xacfe,0x9e91,0xad97,0x9e0f,0xae32,0x9d8f
	.word	0xaecd,0x9d0e,0xaf69,0x9c8f,0xb005,0x9c11,0xb0a2,0x9b94
	.word	0xb141,0x9b18,0xb1df,0x9a9d,0xb27f,0x9a23,0xb31f,0x99a9
	.word	0xb3c1,0x9931,0xb462,0x98ba,0xb505,0x9843,0xb5a8,0x97ce
	.word	0xb64c,0x975a,0xb6f1,0x96e7,0xb797,0x9674,0xb83d,0x9603
	.word	0xb8e4,0x9593,0xb98b,0x9524,0xba33,0x94b6,0xbadc,0x9448
	.word	0xbb86,0x93dc,0xbc30,0x9371,0xbcdb,0x9307,0xbd86,0x929e
	.word	0xbe32,0x9236,0xbedf,0x91d0,0xbf8d,0x916a,0xc03b,0x9105
	.word	0xc0e9,0x90a1,0xc198,0x903f,0xc248,0x8fdd,0xc2f9,0x8f7d
	.word	0xc3aa,0x8f1e,0xc45b,0x8ebf,0xc50e,0x8e62,0xc5c0,0x8e06
	.word	0xc674,0x8dab,0xc728,0x8d51,0xc7dc,0x8cf9,0xc891,0x8ca1
	.word	0xc946,0x8c4b,0xc9fc,0x8bf5,0xcab3,0x8ba1,0xcb6a,0x8b4e
	.word	0xcc22,0x8afc,0xccda,0x8aab,0xcd92,0x8a5b,0xce4b,0x8a0c
	.word	0xcf05,0x89bf,0xcfbf,0x8972,0xd079,0x8927,0xd134,0x88dd
	.word	0xd1ef,0x8894,0xd2ab,0x884c,0xd368,0x8806,0xd424,0x87c0
	.word	0xd4e1,0x877c,0xd59f,0x8739,0xd65d,0x86f7,0xd71b,0x86b6
	.word	0xd7da,0x8676,0xd899,0x8638,0xd958,0x85fb,0xda18,0x85be
	.word	0xdad8,0x8583,0xdb99,0x854a,0xdc5a,0x8511,0xdd1b,0x84da
	.word	0xdddd,0x84a3,0xde9f,0x846e,0xdf61,0x843b,0xe024,0x8408
	.word	0xe0e7,0x83d7,0xe1aa,0x83a6,0xe26d,0x8377,0xe331,0x8349
	.word	0xe3f5,0x831d,0xe4b9,0x82f1,0xe57e,0x82c7,0xe643,0x829e
	.word	0xe708,0x8276,0xe7cd,0x8250,0xe893,0x822a,0xe958,0x8206
	.word	0xea1e,0x81e3,0xeae5,0x81c1,0xebab,0x81a1,0xec72,0x8181
	.word	0xed38,0x8163,0xedff,0x8146,0xeec7,0x812b,0xef8e,0x8110
	.word	0xf055,0x80f7,0xf11d,0x80df,0xf1e5,0x80c8,0xf2ad,0x80b3
	.word	0xf375,0x809e,0xf43d,0x808b,0xf505,0x8079,0xf5cd,0x8069
	.word	0xf696,0x8059,0xf75e,0x804b,0xf827,0x803e,0xf8f0,0x8032
	.word	0xf9b9,0x8028,0xfa81,0x801f,0xfb4a,0x8017,0xfc13,0x8010
	.word	0xfcdc,0x800a,0xfda5,0x8006,0xfe6e,0x8003,0xff37,0x8001
	.word	0x0,0x8000,0xc9,0x8001,0x192,0x8003,0x25b,0x8006
	.word	0x324,0x800a,0x3ed,0x8010,0x4b6,0x8017,0x57f,0x801f
	.word	0x647,0x8028,0x710,0x8032,0x7d9,0x803e,0x8a2,0x804b
	.word	0x96a,0x8059,0xa33,0x8069,0xafb,0x8079,0xbc3,0x808b
	.word	0xc8b,0x809e,0xd53,0x80b3,0xe1b,0x80c8,0xee3,0x80df
	.word	0xfab,0x80f7,0x1072,0x8110,0x1139,0x812b,0x1201,0x8146
	.word	0x12c8,0x8163,0x138e,0x8181,0x1455,0x81a1,0x151b,0x81c1
	.word	0x15e2,0x81e3,0x16a8,0x8206,0x176d,0x822a,0x1833,0x8250
	.word	0x18f8,0x8276,0x19bd,0x829e,0x1a82,0x82c7,0x1b47,0x82f1
	.word	0x1c0b,0x831d,0x1ccf,0x8349,0x1d93,0x8377,0x1e56,0x83a6
	.word	0x1f19,0x83d7,0x1fdc,0x8408,0x209f,0x843b,0x2161,0x846e
	.word	0x2223,0x84a3,0x22e5,0x84da,0x23a6,0x8511,0x2467,0x854a
	.word	0x2528,0x8583,0x25e8,0x85be,0x26a8,0x85fb,0x2767,0x8638
	.word	0x2826,0x8676,0x28e5,0x86b6,0x29a3,0x86f7,0x2a61,0x8739
	.word	0x2b1f,0x877c,0x2bdc,0x87c0,0x2c98,0x8806,0x2d55,0x884c
	.word	0x2e11,0x8894,0x2ecc,0x88dd,0x2f87,0x8927,0x3041,0x8972
	.word	0x30fb,0x89bf,0x31b5,0x8a0c,0x326e,0x8a5b,0x3326,0x8aab
	.word	0x33de,0x8afc,0x3496,0x8b4e,0x354d,0x8ba1,0x3604,0x8bf5
	.word	0x36ba,0x8c4b,0x376f,0x8ca1,0x3824,0x8cf9,0x38d8,0x8d51
	.word	0x398c,0x8dab,0x3a40,0x8e06,0x3af2,0x8e62,0x3ba5,0x8ebf
	.word	0x3c56,0x8f1e,0x3d07,0x8f7d,0x3db8,0x8fdd,0x3e68,0x903f
	.word	0x3f17,0x90a1,0x3fc5,0x9105,0x4073,0x916a,0x4121,0x91d0
	.word	0x41ce,0x9236,0x427a,0x929e,0x4325,0x9307,0x43d0,0x9371
	.word	0x447a,0x93dc,0x4524,0x9448,0x45cd,0x94b6,0x4675,0x9524
	.word	0x471c,0x9593,0x47c3,0x9603,0x4869,0x9674,0x490f,0x96e7
	.word	0x49b4,0x975a,0x4a58,0x97ce,0x4afb,0x9843,0x4b9e,0x98ba
	.word	0x4c3f,0x9931,0x4ce1,0x99a9,0x4d81,0x9a23,0x4e21,0x9a9d
	.word	0x4ebf,0x9b18,0x4f5e,0x9b94,0x4ffb,0x9c11,0x5097,0x9c8f
	.word	0x5133,0x9d0e,0x51ce,0x9d8f,0x5269,0x9e0f,0x5302,0x9e91
	.word	0x539b,0x9f14,0x5433,0x9f98,0x54ca,0xa01d,0x5560,0xa0a2
	.word	0x55f5,0xa129,0x568a,0xa1b0,0x571d,0xa239,0x57b0,0xa2c2
	.word	0x5842,0xa34c,0x58d4,0xa3d7,0x5964,0xa463,0x59f3,0xa4f0
	.word	0x5a82,0xa57e,0x5b10,0xa60d,0x5b9d,0xa69c,0x5c29,0xa72c
	.word	0x5cb4,0xa7be,0x5d3e,0xa850,0x5dc7,0xa8e3,0x5e50,0xa976
	.word	0x5ed7,0xaa0b,0x5f5e,0xaaa0,0x5fe3,0xab36,0x6068,0xabcd
	.word	0x60ec,0xac65,0x616f,0xacfe,0x61f1,0xad97,0x6271,0xae32
	.word	0x62f2,0xaecd,0x6371,0xaf69,0x63ef,0xb005,0x646c,0xb0a2
	.word	0x64e8,0xb141,0x6563,0xb1df,0x65dd,0xb27f,0x6657,0xb31f
	.word	0x66cf,0xb3c1,0x6746,0xb462,0x67bd,0xb505,0x6832,0xb5a8
	.word	0x68a6,0xb64c,0x6919,0xb6f1,0x698c,0xb797,0x69fd,0xb83d
	.word	0x6a6d,0xb8e4,0x6adc,0xb98b,0x6b4a,0xba33,0x6bb8,0xbadc
	.word	0x6c24,0xbb86,0x6c8f,0xbc30,0x6cf9,0xbcdb,0x6d62,0xbd86
	.word	0x6dca,0xbe32,0x6e30,0xbedf,0x6e96,0xbf8d,0x6efb,0xc03b
	.word	0x6f5f,0xc0e9,0x6fc1,0xc198,0x7023,0xc248,0x7083,0xc2f9
	.word	0x70e2,0xc3aa,0x7141,0xc45b,0x719e,0xc50e,0x71fa,0xc5c0
	.word	0x7255,0xc674,0x72af,0xc728,0x7307,0xc7dc,0x735f,0xc891
	.word	0x73b5,0xc946,0x740b,0xc9fc,0x745f,0xcab3,0x74b2,0xcb6a
	.word	0x7504,0xcc22,0x7555,0xccda,0x75a5,0xcd92,0x75f4,0xce4b
	.word	0x7641,0xcf05,0x768e,0xcfbf,0x76d9,0xd079,0x7723,0xd134
	.word	0x776c,0xd1ef,0x77b4,0xd2ab,0x77fa,0xd368,0x7840,0xd424
	.word	0x7884,0xd4e1,0x78c7,0xd59f,0x7909,0xd65d,0x794a,0xd71b
	.word	0x798a,0xd7da,0x79c8,0xd899,0x7a05,0xd958,0x7a42,0xda18
	.word	0x7a7d,0xdad8,0x7ab6,0xdb99,0x7aef,0xdc5a,0x7b26,0xdd1b
	.word	0x7b5d,0xdddd,0x7b92,0xde9f,0x7bc5,0xdf61,0x7bf8,0xe024
	.word	0x7c29,0xe0e7,0x7c5a,0xe1aa,0x7c89,0xe26d,0x7cb7,0xe331
	.word	0x7ce3,0xe3f5,0x7d0f,0xe4b9,0x7d39,0xe57e,0x7d62,0xe643
	.word	0x7d8a,0xe708,0x7db0,0xe7cd,0x7dd6,0xe893,0x7dfa,0xe958
	.word	0x7e1d,0xea1e,0x7e3f,0xeae5,0x7e5f,0xebab,0x7e7f,0xec72
	.word	0x7e9d,0xed38,0x7eba,0xedff,0x7ed5,0xeec7,0x7ef0,0xef8e
	.word	0x7f09,0xf055,0x7f21,0xf11d,0x7f38,0xf1e5,0x7f4d,0xf2ad
	.word	0x7f62,0xf375,0x7f75,0xf43d,0x7f87,0xf505,0x7f97,0xf5cd
	.word	0x7fa7,0xf696,0x7fb5,0xf75e,0x7fc2,0xf827,0x7fce,0xf8f0
	.word	0x7fd8,0xf9b9,0x7fe1,0xfa81,0x7fe9,0xfb4a,0x7ff0,0xfc13
	.word	0x7ff6,0xfcdc,0x7ffa,0xfda5,0x7ffd,0xfe6e,0x7fff,0xff37

        .section .text,4,c

        .align  4
	.xdef   _fft16_mac
	.xdef	_inv_fft16_mac


;******************************************************************************
;Bit reversal sorting
;******************************************************************************
;Sorts the samples in ReX[] and Imx[] using bit reversal address mode
;******************************************************************************
;Corresponding C code:
;
;	void RevSort(void){
;	short i,j,k,n=1024,nm1=n-1,nd2=n>>1;
;	float tr,ti;
;
;	j=nd2;
;	for (i=1;i<=n-2;i++){
;		if (i<j){
;			tr=ReX[j];
;			ti=ImX[j];
;			ReX[j]=ReX[i];
;			ImX[j]=ImX[i];
;			ReX[i]=tr;
;			ImX[i]=ti;
;	  	}
;		k=nd2;
; 		while (k<=j){
;			j=j-k;
;			k=k>>1;
; 		}
;  		j=j+k;
;	}
;
;	}


_rev_addr_sort:
	movem.l	(4,a7),a0/a1		;load start addresses of ReX and ImX buffers
					;to a0 and a1 registers
	move.l	#512,d2			;store nd2*2 into d2 register (d2 is j)
					;'2' is the size of one sample (2 bytes)
	moveq.l	#2,d1			;d1 is i
for:					;for (i=1;i<=n-2;i++){
	cmp.l	d2,d1			;if (i<j){
	bcc.b	jk
	lea	(a0,d1.l),a2		;add index j to the beginning address of buffer ReX
					;and store it into a2
	lea	(a1,d1.l),a3		;add index j to the beginning address of buffer ImX
					;and store it into a3
	move.w	(a0,d2.l),d3		;tr=ReX[j];
	move.w	(a1,d2.l),d4		;ti=ImX[j];
	move.w	(a2),(a0,d2.l)		;ReX[j]=ReX[i];
	move.w	(a3),(a1,d2.l)		;ImX[j]=ImX[i];
	move.w	d3,(a2)			;ReX[i]=tr;
	move.w	d4,(a3)			;ImX[i]=ti;
jk:	
	move.l	#512,d0			;d0 is k, k=nd2*2
					;'2' is the size of one sample (2 bytes);
	
	cmp.l	d0,d2			;while (k<=j){
	bcs.b	j
while:	sub.l	d0,d2			;j=j-k;
	lsr.l	#1,d0			;k=k>>1;
	cmp.l	d0,d2
	bcc.b	while
j:	
	add.l	d0,d2			;j=j+k;

	addq.l	#2,d1
	cmp.l	#1022,d1
	bcs.b	for
	rts

;**********************************************************************************
;Real FFT
;**********************************************************************************
;Upon entry, REX[ ] contains the real input signal, while values in IMX[ ] are
;ignored.
;Upon return, REX[ ] and IMX[ ] contain the DFT output.
;All signals run from 0 to 1023.
;**********************************************************************************
_fft16_mac:
	lea	-72(a7),a7
	movem.l	d0-d7/a0-a6,(a7)	;store contents of all registers into stack

	;Corresponding C code:		;separate even and odd points
	;for (i=0;i<512;i++){
	;    ReX[i]=ReX[2*i];
	;    ImX[i]=ReX[2*i+1];
	;}

	movem.l	(76,a7),a0/a1		;separate even and odd points
					;point a0 and a1 to ReX and ImX buffers
	moveq.l	#0,d0
	movea.l	a0,a2
reorder:
	move.l	(0,a0,d0.l),d1		;ReX[i],ImX[i] - > d1
	move.w	d1,(a1)+		;ImX[i]=ImX[2*i+1];
	swap	d1
	move.w	d1,(a2)+		;ReX[i]=ReX[2*i];
	addq.l	#4,d0			;modification of loop counter
	cmpi.l	#2048,d0
	bcs.b	reorder

	lea	-1024(a1),a1		;bit reversal sorting
	move.l	a1,-(a7)		;push address of ImX[] buffer into the stack
	move.l	a0,-(a7)		;push address of ReX[] buffer into the stack
	jsr	_rev_addr_sort		;jump to subroutine
	lea	(8,a7),a7; //addq.l	#8,a7

	moveq.l	#0,d6			;first stage of FFT
first_stage:
	move.l	(a0),d0			;d0 = ar,br
	move.l	d0,d2			;d2 = ar,br
	swap	d0			;d0 = br,ar
	ext.l	d0			;d0 = ar
	ext.l	d2			;d2 = br
	move.l	(a1),d1			;d1 = ai,bi
	move.l	d1,d3			;d3 = ai,bi
	swap	d1			;d1 = bi,ai
	ext.l	d1			;d1 = ai
	ext.l	d3			;d3 = bi
	move.l	d2,d4			;d4 = br
	move.l	d3,d5			;d5 = bi
					;on the first stage the butterfly operation
					;looks like:
					;xr = ar + br
					;xi = ai + bi
					;yr = ar - br
					;yi = ai - bi

	add.l	d0,d2			;xr = ar + br
	move.w	d2,(a0)+
	add.l	d1,d3			;xi = ai + bi
	move.w	d3,(a1)+
	sub.l	d4,d0			;yr = ar - br
	move.w	d0,(a0)+
	sub.l	d5,d1			;yi = ai - bi
	move.w	d1,(a1)+
	addq.l	#1,d6
	cmpi.l	#256,d6
	bcs.b	first_stage

	movea.l	#0,a6			;second stage of FFT
	lea	(-1024,a0),a0		;a0 points to the beginning of ReX buffer
	lea	(-1024,a1),a1		;a1 points to the beginning of ImX buffer
					;on the second stage we will calculate
					;two butterflies, first of which looks like
					;butterfly on the first stage
					;xr0 = ar0 + br0
					;xi0 = ai0 + bi0
					;yr0 = ar0 - br0
					;yi0 = ai0 - bi0,
					;and second looks like
					;xr1 = ar1 + bi1
					;xi1 = ai1 - br1
					;yr1 = ar1 - bi1
					;yi1 = ai1 + br1
second_stage:
	move.l	(a0),d0			;d0 = ar0,ar1
	move.l	d0,d1			;d1 = ar0,ar1
	swap	d0			;d0 = ar1,ar0
	ext.l	d0			;d0 = ar0
	ext.l	d1			;d1 = ar1
	move.l	(4,a0),d2		;d2 = br0,br1
	move.l	d2,d3			;d3 = br0,br1
	swap	d2			;d2 = br1,br0
	ext.l	d2			;d2 = br0
	ext.l	d3			;d3 = br1
	move.l	(a1),d4			;d4 = ai0,ai1
	move.l	d4,d5			;d5 = ai0,ai1
	swap	d4			;d4 = ai1,ai0
	ext.l	d4			;d4 = ai0
	ext.l	d5			;d5 = ai1
	move.l	(4,a1),d6		;d6 = bi0,bi1
	move.l	d6,d7			;d7 = bi0,bi1
	swap	d6			;d6 = bi1,bi0
	ext.l	d6			;d6 = bi0
	ext.l	d7			;d7 = bi1
	movea.l	d0,a2			;a2 = ar0
	movea.l	d1,a3			;a3 = ar1
	movea.l	d4,a4			;a4 = ai0
	movea.l	d5,a5			;a5 = ai1
	add.l	d2,d0			;xr0 = ar0 + br0
	move.w	d0,(a0)+
	add.l	d7,d1			;xr1 = ar1 + bi1
	move.w	d1,(a0)+
	suba.l	d2,a2			;yr0 = ar0 - br0
	move.w	a2,(a0)+
	suba.l	d7,a3			;yr1 = ar1 - bi1
	move.w	a3,(a0)+
	add.l	d6,d4			;xi0 = ai0 + bi0
	move.w	d4,(a1)+
	sub.l	d3,d5			;xi1 = ai1 - br1
	move.w	d5,(a1)+
	suba.l	d6,a4			;yi0 = ai0 - bi0
	move.w	a4,(a1)+
	adda.l	d3,a5			;yi1 = ai1 + br1
	move.w	a5,(a1)+
	addq.l	#1,a6;
	cmpa.l	#128,a6
	bcs.b	second_stage



	move.l	#64,d0			;FFT for complex values
	move.l	d0,(64,a7)		;starts from 3-rd stage
	moveq.l	#2,d0
	move.l	d0,(68,a7)		;stage loop counter (starts from 3rd stage)
	movea.l	#8,a5			;a5 contains the number of butterflies
					;per one sub DFT multiplied
					;by 2 (the size of values)
	move.l	#512,d6			;step in the table of twiddle factors
					;(multiplied by 2 because of the size
					;of coefficients)
	movea.l	#0,a6			;counter for butterfly loop

					;from MSW to LSW to store it correctly
					;into the memory)

 move.l		#0x00000070,MACSR // used to emulate MAC with EMAC

   	clr.l d2
   	clr.l d7
   
	
next_stage:				;start of stages loop
	moveq.l	#0,d0
	move.l	d0,(60,a7)		;sub DFT loop counter
	movem.l	(76,a7),a0-a1		;a0 points to ar0, a1 points to ai0
	movea.l	a0,a2
	movea.l	a1,a3
	adda.l	a5,a2			;a2 points to br0
	adda.l	a5,a3			;a3 points to bi0
next_subDFT:				;start of sub DFTs loop
	movea.l	#TF_table,a4		;a4 points to the beginning of the table

next_bf:				;start of butterflies loop
	move.l	(a4),d0			;wr -> MSW of d0
	                        ;wi -> LSW of d0
	move.w	(a0),d2			;ar -> MSW of d2
	move.l	(a2),d4			;br -> MSW of d4
	move.w (a1),d7
        				
	move.l	#0,ACC
	msacl.w	d0.u,d4.u,<<,(a3),d5	;ar-br*wr -> ACC, bi -> MSW of d5
	msac.w	d0.l,d5.u,<<,ACC	;ar-br*wr-bi*wi = xr -> ACC, ai -> MSW of d7
	move.l	ACC,d3
	
	add.l d2,d3			;ar -> ACC
	move.w	d3,(a0)+		;xr -> memory

	add.l	d2,d2			;2*ar -> d2
	sub.l	d3,d2			;2*ar-xr = yr -> d2
	move.w	d2,(a2)+		;yr -> memory

	move.l	#0,ACC
	mac.w	d0.l,d4.u,<<	;ai+br*wi -> ACC, ar -> MSW of d2
	msac.w	d0.u,d5.u,<<	;ai+br*wi-bi*wr = xi -> ACC, br -> MSW of d4
		
	move.l	ACC,d3
	add.l d7,d3
	move.w	d3,(a1)+		;xi -> memory

	add.l	d7,d7			;2*ai -> d7
	sub.l	d3,d7			;2*ai-xi = yi -> d7
	move.w	d7,(a3)+		;yi -> memory

	adda.l	d6,a4			;modify pointer to the twiddle factor
					;for the next butterfly
	addq.l	#2,a6
	cmpa.l	a5,a6
	bcs.b	next_bf			;end of butterflies loop
					;of the current sub DFT
	move.l	#0,a6
	adda.l	a5,a0			;a0 - a3 point to the input values
	adda.l	a5,a1			;for the first butterfly
	adda.l	a5,a2			;of the next sub DFT
	adda.l	a5,a3

	move.l	(60,a7),d0
	addq.l	#1,d0
	move.l	d0,(60,a7)		;increment sub DFT loop counter
	cmp.l	(64,a7),d0		;compare sub DFT loop counter with
					;the number of sub DFTs on this stage
	bcs.w	next_subDFT		;end of sub DFTs loop
	
    moveq.l	#0,d0
	move.l	d0,(60,a7)		;store 0 to the sub DFT loop counter
	adda.l	a5,a5			;multiply contents of a5 (the number of
					;butterflies per one sub DFT) by 2 for the
					;next stage
	lsr.l	#1,d6			;divide step in the table of twiddle
					;factors by 2
	move.l	(64,a7),d0		;divide the number of sub DFTs for the
	lsr.l	#1,d0			;next stage by 2
	move.l	d0,(64,a7)
	move.l	(68,a7),d0		;increment stage loop counter
	addq.l	#1,d0
	move.l	d0,(68,a7)
	cmpi.l	#9,d0
	bcs.w	next_stage		;end of stage loop


;even/odd frequency domain decomposition	
;Corresponding C code:
	;nm1=smpl_num-1;
	;nd2=smpl_num>>1;
	;n4=(smpl_num>>2);
	;for (i=1;i<n4;i++){
	;    im=nd2-i;
	;    ip2=i+nd2;
	;    ipm=im+nd2;
	;
	;    ReX[ip2]=(ImX[i]+ImX[im])/2;
	;    ReX[ipm]=ReX[ip2];
	;
	;    ImX[ip2]=(ReX[im]-ReX[i])/2;
	;    ImX[ipm]=-ImX[ip2];
	;
	;    ReX[i]=(ReX[i]+ReX[im])/2;
	;    ReX[im]=ReX[i];
	;
	;    ImX[i]=(ImX[i]-ImX[im])/2;
	;    ImX[im]=-ImX[i];
	;}
	;n34=(smpl_num*3)>>2;
	;ReX[n34]=ImX[n4];
	;ReX[nd2]=ImX[0];
	;ImX[n34]=0;
	;ImX[nd2]=0;
	;ImX[n4]=0;
	;ImX[0]=0;
	
	movem.l	(76,a7),a0/a1		;even/odd frequency domain decomposition
	lea	(2,a0),a2
	lea	(2,a1),a3
	lea	(1024,a0),a4
	lea	(1024,a1),a5
	lea	(1026,a0),a6
	move.l	#1026,d0
	move.l	#2046,d6
adjust:	move.w	(a3),d3			;d3 = ImX[i]
	ext.l	d3
	move.l	d3,d1			;d1 = ImX[i]
	move.w	-(a5),d5		;d5 = ImX[im]
	ext.l	d5
	add.l	d5,d3			;d3 = ImX[i] + ImX[im]
	asr.l	#1,d3			;d3 = (ImX[i] + ImX[im]) / 2
	move.w	d3,(a6)+		;ReX[ip2] = d3
	move.w	d3,(a0,d6.l)		;ReX[ipm] = ReX[ip2]
	move.w	-(a4),d4		;d4 = ReX[im]
	ext.l	d4
	move.l	d4,d7			;d7 = ReX[im]
	move.w	(a2),d2			;d2 = ReX[i]
	ext.l	d2
	sub.l	d2,d4			;d4 = ReX[im] - ReX[i]
	asr.l	#1,d4			;d4 = (ReX[im] - ReX[i]) / 2
	move.w	d4,(a1,d0.l)		;ImX[ip2] = d4
	neg.l	d4			;d4 = -d4
	move.w	d4,(a1,d6.l)		;ImX[ipm] = -ImX[ip2]
	add.l	d7,d2			;d2 = ReX[i] + ReX[im]
	asr.l	#1,d2			;d2 = (ReX[i] + ReX[im]) / 2
	move.w	d2,(a2)+		;ReX[i] = d2
	move.w	d2,(a4)			;ReX[im] = ReX[i]
	sub.l	d5,d1			;d1 = ImX[i] - ImX[im]
	asr.l	#1,d1			;d1 = (ImX[i] - ImX[im]) / 2
	move.w	d1,(a3)+		;ImX[i] = d1
	neg.l	d1			;d1 = -d1
	move.w	d1,(a5)			;ImX[im]=-ImX[i];
	addq.l	#2,d0			;loop processing
	subq.l	#2,d6
	cmpi.l	#1536,d0
	bcs.b	adjust
	moveq.l	#0,d0
	move.w	(512,a1),(1536,a0)	;ReX[n34]=ImX[n4];
	move.w	(a1),(1024,a0)		;ReX[nd2]=ImX[0];
	move.w	d0,(1536,a1)		;ImX[n34]=0;
	move.w	d0,(1024,a1)		;ImX[nd2]=0;
	move.w	d0,(512,a1)		;ImX[n4]=0;
	move.w	d0,(a1)			;ImX[0]=0;

	movea.l	#1024,a5		;the last stage of FFT
	movea.l	#TF_table,a4		;a4 points to the first twiddle factor
	movea.l	a0,a2			;a0 points to ar0
	movea.l	a1,a3			;a1 points to ai0
	adda.l	a5,a2			;a2 points to br0
	adda.l	a5,a3			;a3 points to bi0
	move.l	#0,a6			;counter for butterfly loop

fin_stage:
	move.l	(a4)+,d0		;wr -> MSW of d0
        					;wi -> LSW of d0

	move.l (a2),d4
	move.w (a0),d2
	move.w (a1),d7

	ext.l d2
	ext.l d7

	move.l	#0,ACC
	msacl.w	d0.u,d4.u,(a3),d5	;ar-br*wr -> ACC, bi -> MSW of d5
	msac.w	d0.l,d5.u	;ar-br*wr-bi*wi = xr -> ACC, ai -> MSW of d6
	move.l	ACC,d3
	
	add.l d2,d3
	ext.l d3
	asr.l #1,d3
	move.w	d3,(a0)+

	sub.l	d3,d2			;2*ar-xr = yr -> d2
	move.w	d2,(a2)+		;yr -> memory

	move.l	#0,ACC			;ai -> ACC
	mac.w	d0.l,d4.u,<<	;ai+br*wi -> ACC, ar -> MSW of d2
	msac.w	d0.u,d5.u,<<	;ai+br*wi-bi*wr = xi -> ACC, br -> MSW of d4
	move.l	ACC,d3
    
    add.l d7,d3
	ext.l d3
	asr.l #1,d3
	move.w	d3,(a1)+					;xi -> memory
	
	sub.l	d3,d7			;2*ai-xi = yi -> d6
	move.w	d7,(a3)+		;yi -> memory

	adda.l	#2,a6
	cmpa.l	a5,a6
	bcs.b	fin_stage		;end of butterfly loop

	move.l		#0x00000000,MACSR
	 
	movem.l	(a7),d0-d7/a0-a6
	lea	+72(a7),a7
        rts

;******************************************************************************
;Inversed Real FFT
;******************************************************************************
;Upon entry, REX[ ] and IMX[ ] contain the real and imaginary parts of the
;frequency domain running from index 0 to 512. The remaining samples in
;REX[ ] and IMX[ ] are ignored.
;Upon return, REX[ ] contains the real time domain.
;******************************************************************************
_inv_fft16_mac:
;make frequency domain symmetrical
;Corresponding C code:
	;n=smpl_num;
	;for (i=((n>>1)+1);i<n;i++){
	;    ReX[i]=ReX[n-i];
	;    ImX[i]=-ImX[n-i];
	;}

	moveq.l	#0,d1				;make frequency domain
	movem.l	(4,a7),a0-a1			;symmetrical
	lea	(1024,a0),a2
	lea	(1024,a1),a3
	lea	(1026,a0),a0
	lea	(1026,a1),a1
movneg:
	move.w	-(a2),(a0)+			;ReX[i]=ReX[n-i];
	move.w	-(a3),d0	
	neg.l	d0
	move.w	d0,(a1)+			;ImX[i]=-ImX[n-i];
	addq.l	#1,d1				;loop processing
	cmp.l	#511,d1
	bcs.b	movneg

;add real and imaginary parts together
;Corresponding C code:
	;for (i=0;i<n;i++){
	;    ReX[i]=ReX[i]+ImX[i];
	;}

	moveq.l	#0,d0				;add real and imaginary parts
	movem.l	(4,a7),a0-a1			;together
sum:
	move.w	(a0),d1				;d1 = ReX[i]
	ext.l	d1
	move.w	(a1)+,d2			;d2 = ImX[i]
	ext.l	d2
	add.l	d2,d1				;d1 = ReX[i] + ImX[i]
	move.w	d1,(a0)+			;ReX[i] = d1	
	addq.l	#1,d0				;loop processing
	cmp.l	#1024,d0
	bcs.b	sum

	move.l	(12,a7),-(a7)			;calculate forward real FFT
	move.l	(12,a7),-(a7)
	move.l	(12,a7),-(a7)
	jsr	_fft16_mac
	add.l	#12,a7

;add real and imaginary parts together
;Corresponding C code:
;for (i=0;i<n;i++){
;    ReX[i]=(ReX[i]+ImX[i])/n;
;}

	moveq.l	#0,d0				;add real and imaginary parts
	movem.l	(4,a7),a0-a1			;together
norm:
	move.w	(a0),d1				;d1 = ReX[i]
	move.w	(a1)+,d2			;d2 = ImX[i]
	add.l	d2,d1				;d1 = ReX[i] + ImX[i]
	move.w	d1,(a0)+			;ReX[i] = d1
	addq.l	#1,d0				;loop processing
	cmp.l	#1024,d0
	bcs.b	norm
	rts
